/*
Analyze Simulated Data's Fit in Terms of Migration, Marriage, and Income Mobility Moments

Author: GA
Last Updated: 8/3/2019
*/

******Simulated Destinations (from true probabilities)
use "$temp/destinations_simulated", clear

******Step 1: Prepare Comparision Data******

//get migration and marriage rates
use "$temp/acs_master_kid_sample", clear //cleaned-up ACS sample

gen married=0
replace married = 1 if marst == 1 | marst == 2

gen move_coll = move if coll
gen move_hs = move if !coll

//collapse for migration and marriage moments
collapse (mean) married move move_coll move_hs coll [w = perwt], by(bpl)

ren bpl statefips
merge 1:1 statefips using "$temp/chetty_state_estimates", keep(match) nogen
merge 1:1 statefips using "$temp/chetty_state_data_round2", keep(match) nogen
keep state* iim married move two coll* kfr* move_coll move_hs

save "$temp/eval_data", replace //data with which to devaluate model fit


*******Step 2: Prepare Simulated Data******

****read in base simulated data
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear

//            line = [fips[l] fips[lp] skill_prices[l] prim_2010.skill_prices[lp] m mp t e2p e3p h3p_val ap S_par S pops[l] mig e3_fam e3p_fam]


//rename
ren v1 state_born
ren v2 state_move
drop v3 v4
ren v5 parent_marr
ren v6 kid_marr
ren v7 parent_time_inv
ren v8 kid_earn_p2
ren v9 kid_earn_p3
ren v10 kid_hc_p3
ren v11 ability
ren v12 coll_par
ren v13 coll_kid
ren v14 weight
ren v15 mig
ren v16 type
ren v17 mig2
ren v18 lp_norec
ren v19 state_p2
ren v20 race
ren v21 region
ren v22 util
ren v23 parent_fam_earn
ren v24 kid_fam_earn
ren v25 parent_pctile
ren v26 kid_pctile

//two ways to measure
preserve
keep if parent_pctile == 25
collapse (mean) kid_pctile, by(state_born)
tempfile iim1
save `iim1'
restore

preserve
keep if parent_pctile<50
collapse (mean) kid_pctile, by(state_born)
ren kid_pctile kid_pctile_2
tempfile iim2
save `iim2'
restore

preserve
keep if parent_pctile<50
collapse (mean) kid_pctile, by(state_born parent_pctile)
collapse (mean) kid_pctile, by(state_born)
ren kid_pctile kid_pctile_3

merge 1:1 state_born using `iim1', keep(match) nogen
merge 1:1 state_born using `iim2', keep(match) nogen
ren state_born statefips
merge 1:1 statefips using "$temp/eval_data", keep(match) nogen
corr iim kid_pctile_2
save "$temp/iim_options", replace
restore 

noi su parent_time_inv if parent_marr == 0
noi su parent_time_inv if parent_marr == 1

gen parent_time_ind = parent_time_inv
replace parent_time_ind = parent_time_ind/2 if parent_marr == 1
noi su parent_time_ind if parent_marr == 1
noi su parent_time_ind if parent_marr == 0

//quintile transitions
xtile parent_quint = parent_fam_earn, nq(5)
xtile kid_quint = kid_fam_earn, nq(5)

noi tab kid_quint parent_quint, row

//generate list of most popular destination states
ren state_move statefips
merge m:1 statefips using "$data/Crosswalks/state_fips_crosswalk", nogen keep(match)

//most popular destinations in simulated data
noi tab state_full if statefips!=state_born, sort //maryland, illinois, washington, colorado, new jersey, georgia california, virginia, michigan
gen move_simul = (statefips!=state_born)

drop statefips
ren state_born statefips
ren kid_marr married_simul
gen move_coll_simul = move_simul if coll_kid
gen move_hs_simul = move_simul if !coll_kid

collapse (mean) move_simul move_coll_simul move_hs_simul coll_kid, by(statefips)

tempfile simul1
save `simul1'

****read in p25 simulated data
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear

//rename
ren v1 state_born
ren v2 state_move
drop v3 v4
ren v5 parent_marr
ren v6 kid_marr
ren v7 parent_time_inv
ren v8 kid_earn_p2
ren v9 kid_earn_p3
ren v10 kid_hc_p3
ren v11 ability
ren v12 coll_par
ren v13 coll_kid
ren v14 weight
ren v15 mig
ren v16 type
ren v17 mig2
ren v18 lp_norec
ren v19 state_p2
ren v20 race
ren v21 region
ren v22 util
ren v23 parent_fam_earn
ren v24 kid_fam_earn
ren v25 parent_pctile
ren v26 kid_pctile


drop if parent_pctile>=50
noi tab parent_pctile
replace kid_marr = kid_marr - 1

gen kid_income_ind = kid_fam_earn
replace kid_income_ind = kid_income_ind/2 if kid_marr == 1

su kid_income_ind if parent_marr == 0
su kid_income_ind if parent_marr == 1

//good correlation between stayers and leavers
preserve
keep if state_move==state_born
collapse (mean) kid_pctile, by(state_born)
ren kid_pctile kid_pctile_stayer
tempfile stayers
ren state_born statefips
save `stayers'
save "$temp/simulated_iim_stayers", replace
restore

ren state_born statefips
ren coll_par coll_par_p25
ren coll_kid coll_kid_p25
collapse (mean) parent_marr kid_marr kid_pctile coll_par coll_kid, by(statefips)
merge 1:1 statefips using `stayers', nogen
noi corr kid_pctile_stayer kid_pctile //yes!
noi su kid_pctile*
drop kid_pctile_stayer

//merge with other data
ren kid_pctile iim_simul
ren parent parent_marr_simul
ren kid_marr married_simul
merge 1:1 statefips using `simul1', keep(match) nogen
merge 1:1 statefips using `stayers', keep(match) nogen
merge 1:1 statefips using "$temp/eval_data", keep(match) nogen
ren two parent_marr
keep state* iim* move* parent* married married_simul coll_par coll_kid* coll_pooled coll* kfr* kid_pctile*
//ren coll_kid coll_kid_simul
//ren coll_pooled coll_kid
merge 1:1 statefips using "$data/Crosswalks/state_fips_crosswalk", keep(match) nogen
save "$temp/simulated_model_baseline", replace


*************Step 3: Fit Evaluation*************
use "$temp/simulated_model_baseline", clear
local vals `"iim move married parent_marr"'
//replace coll_kid_simul = coll_kid_simul -1
//report correlations
foreach val in `vals'{
	noi corr `val'*
}

noi corr parent_marr_simul iim_simul
noi corr parent_marr iim
noi corr married_simul iim_simul
noi corr coll_kid*
su coll_kid*
noi corr coll_kid coll


//evaluate college fit


//maptile iim_simul, geo(state) n(9) rev spopt(mos(thick)) legdecimals(2) 
maptile iim, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(1) cutv(36 40 42 44 46 50)
graph export "$output/fig5_iim_standard.png", replace 
maptile iim_simul, geo(state) rev spopt(mos(thick) ) legdecimals(1) cutv(36 40 42 44 46 50)
graph export "$output/fig5_iim_simul_standard.png", replace
graph close

replace kfr_sty = kfr_sty * 100
maptile kfr_stycz_pooled_pooled_p25, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(1) cutv(36 40 42 44 46 50)
graph export "$output/fig5_iim_standard_stayer.png", replace 
maptile kid_pctile_stayer, geo(state) rev spopt(mos(thick) ) legdecimals(1) cutv(36 40 42 44 46 50)
graph export "$output/fig5_iim_simul_standard_stayer.png", replace
graph close

maptile move, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(2) cutv(0.25 0.325 0.4 0.475 0.55 0.625)
graph export "$output/fig5_move_standard.png", replace 
maptile move_simul, geo(state) rev spopt(mos(thick)) legdecimals(2) cutv(0.25 0.325 0.4 0.475 0.55 0.625)
graph export "$output/fig5_move_simul_standard.png", replace
graph close

maptile move_hs, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(2) cutv(0.25 0.325 0.4 0.475 0.55 0.625)
graph export "$output/fig5_move_hs_standard.png", replace 
maptile move_hs_simul, geo(state) rev spopt(mos(thick)) legdecimals(2) cutv(0.25 0.325 0.4 0.475 0.55 0.625)
graph export "$output/fig5_move_hs_simul_standard.png", replace
graph close

maptile move_coll, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(2) cutv(0.25 0.325 0.4 0.475 0.55 0.625)
graph export "$output/fig5_move_coll_standard.png", replace 
maptile move_coll_simul, geo(state) rev spopt(mos(thick)) legdecimals(2) cutv(0.25 0.325 0.4 0.475 0.55 0.625)
graph export "$output/fig5_move_coll_simul_standard.png", replace
graph close




maptile coll, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(2) cutv(0.25 0.3 0.35 0.4 0.45)
graph export "$output/fig5_coll_standard.png", replace 
maptile coll_kid, geo(state) rev spopt(mos(thick)) legdecimals(2) cutv(0.25 0.3 0.35 0.4 0.45)
graph export "$output/fig5_coll_simul_standard.png", replace
graph close


maptile married, geo(state) rev spopt(mos(thick) legenda(off)) legdecimals(2) cutv(0.525 0.55 0.575 0.6 0.625 0.65 0.675)
graph export "$output/fig5_married_standard.png", replace 
maptile married_simul, geo(state) rev spopt(mos(thick)) legdecimals(2) cutv(0.525 0.55 0.575 0.6 0.625 0.65 0.675)
graph export "$output/fig5_married_simul_standard.png", replace
graph close

}



///////tables for college attendance breakdowns
****read in p25 simulated data
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear

//rename
ren v1 state_born
ren v2 state_move
drop v3 v4
ren v5 parent_marr
ren v6 kid_marr
ren v7 parent_time_inv
ren v8 kid_earn_p2
ren v9 kid_earn_p3
ren v10 kid_hc_p3
ren v11 ability
ren v12 coll_par
ren v13 coll_kid
ren v14 weight
ren v15 mig
ren v16 type
ren v17 mig2
ren v18 lp_norec
ren v19 state_p2
ren v20 race
ren v21 region
ren v22 util
ren v23 parent_fam_earn
ren v24 kid_fam_earn
ren v25 parent_pctile
ren v26 kid_pctile

gen parent_quart=1
replace parent_quart = 2 if parent_pctile>=25 & parent_pctile<50
replace parent_quart = 3 if parent_pctile>=50 & parent_pctile<75
replace parent_quart = 4 if parent_pctile>=75 & parent_pctile<100


keep if coll_par==2 //or 1, depending on which oneyou want to reproduce
collapse (mean) coll_kid [w=weight], by(ability parent_quart)
reshape wide coll_kid, i(parent_quart) j(ability)
br

//broadly speaking, we're underestimating college attendance for lower-income kids. Try adding in financial aid.




//marriage probabilities
import delimited "$dir/Model/Simulated_data/marriage_probs.csv", clear
ren v1 hc
ren v2 prob_marr_hs_ca
ren v3 prob_marr_coll_ca
ren v4 prob_marr_hs_ia
ren v5 prob_marr_coll_ia
ren v6 prob_marr_hs_ms
ren v7 prob_marr_coll_ms
ren v8 prob_marr_hs_ny
ren v9 prob_marr_coll_ny
ren v10 prob_marr_hs_tx
ren v11 prob_marr_coll_tx
ren v12 prob_marr_hs_ut
ren v13 prob_marr_coll_ut
drop if hc>3


local states `"ca ia ms ny tx ut"'

foreach state in `states'{
	twoway line prob_marr_hs_`state' hc || line prob_marr_coll_`state' hc, graphregion(color(white)) bgcolor(white) ylabel(0(0.1)1.0) legend(lab(1 "High School") lab(2 "College")) ///
	ytitle("Marriage Probability") xtitle("Human Capital")
	graph export "$output/marr_probs_`state'.png", replace
}


